import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv("CAC 2 (Responses) - Form Responses 1.csv")
df.head()
Timestamp | Course | State | Gender | Overall how would you rate your mental health?\n | Is there any history of mental health disorder in your family?\n | On average, how many hours do you sleep per day?\n | Have you ever used the counselling service provided by the college? | Family Structure | Is this the first time you're staying away from home? | How is prayer and meditation important in your daily life? | On average, how stressed are you? | How long have you been in Lavasa? | What strategies do you use to cope with stress and anxiety in your daily life? | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10/10/2023 21:44:21 | MSc DS | Kerala | Male | 4 | No | 4-6 hours | No | Joint Family | Yes | 4 | 4 | Less than 6 months | Meditation |
1 | 10/10/2023 22:14:11 | MSc DS | Kerala | Female | 2 | No | Less than 4 hours | No | Nuclear Family | No | 3 | 4 | Less than 6 months | Listening music |
2 | 10/10/2023 22:15:16 | MSc DS | Uttar Pradesh | Female | 4 | No | 4-6 hours | No | Nuclear Family | No | 3 | 4 | Less than 6 months | Listening music |
3 | 10/10/2023 22:19:15 | MSc DS | Kerala | Female | 2 | No | 7-8 hours | Yes | Nuclear Family | No | 3 | 4 | Less than 6 months | Sleeping |
4 | 10/10/2023 22:28:00 | MSc DS | Other | Male | 3 | No | 4-6 hours | No | Nuclear Family | No | 5 | 4 | Less than 6 months | Sports |
df.tail()
Timestamp | Course | State | Gender | Overall how would you rate your mental health?\n | Is there any history of mental health disorder in your family?\n | On average, how many hours do you sleep per day?\n | Have you ever used the counselling service provided by the college? | Family Structure | Is this the first time you're staying away from home? | How is prayer and meditation important in your daily life? | On average, how stressed are you? | How long have you been in Lavasa? | What strategies do you use to cope with stress and anxiety in your daily life? | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
310 | 10/24/2023 14:33:02 | MBA | Kerala | Male | 4 | No | Less than 4 hours | No | Nuclear Family | Yes | 3 | 4 | Less than 6 months | Sleeping |
311 | 10/24/2023 14:33:26 | MBA | Kerala | Female | 4 | No | Less than 4 hours | No | Joint Family | No | 3 | 4 | Less than 6 months | Listening music |
312 | 10/24/2023 14:35:22 | MBA | Kerala | Female | 4 | No | Less than 4 hours | Yes | Joint Family | No | 2 | 5 | Less than 6 months | Sleeping |
313 | 10/24/2023 14:36:03 | MBA | Tamil Nadu | Female | 4 | No | Less than 4 hours | No | Joint Family | No | 3 | 4 | Less than 6 months | Listening music |
314 | 10/24/2023 14:36:55 | MBA | West Bengal | Male | 4 | No | 4-6 hours | No | Joint Family | Yes | 4 | 4 | Less than 6 months | Sports |
df.columns
Index(['Timestamp', 'Course', 'State', 'Gender', 'Overall how would you rate your mental health?\n', 'Is there any history of mental health disorder in your family?\n', 'On average, how many hours do you sleep per day?\n', 'Have you ever used the counselling service provided by the college?', 'Family Structure', 'Is this the first time you're staying away from home?', 'How is prayer and meditation important in your daily life?', 'On average, how stressed are you? ', 'How long have you been in Lavasa?', 'What strategies do you use to cope with stress and anxiety in your daily life?'], dtype='object')
df.drop(["Timestamp"],axis=1,inplace=True) #removing timestamp
#changing column names for ease
df.columns=["Course","State","Gender","Mental Health Rating","Family History","Sleep Duration","Counselling Service Usage","Family Structure","First Time Away From Home","Importance of Prayer,Meditation","Stress Score","Length of Stay","Coping Strategies"]
#dropping values beyond scope of study
df["State"]=df["State"].replace("Other",np.nan)
df["Course"]=df["Course"].replace("Other",np.nan)
#replacing course name for ease
df["Course"] = df["Course"].replace("MSc Global Finance and Analytics","Msc GFA")
df.dropna(inplace=True)
df.head(5)
Course | State | Gender | Mental Health Rating | Family History | Sleep Duration | Counselling Service Usage | Family Structure | First Time Away From Home | Importance of Prayer,Meditation | Stress Score | Length of Stay | Coping Strategies | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | MSc DS | Kerala | Male | 4 | No | 4-6 hours | No | Joint Family | Yes | 4 | 4 | Less than 6 months | Meditation |
1 | MSc DS | Kerala | Female | 2 | No | Less than 4 hours | No | Nuclear Family | No | 3 | 4 | Less than 6 months | Listening music |
2 | MSc DS | Uttar Pradesh | Female | 4 | No | 4-6 hours | No | Nuclear Family | No | 3 | 4 | Less than 6 months | Listening music |
3 | MSc DS | Kerala | Female | 2 | No | 7-8 hours | Yes | Nuclear Family | No | 3 | 4 | Less than 6 months | Sleeping |
5 | MSc DS | West Bengal | Female | 1 | No | 4-6 hours | No | Nuclear Family | Yes | 3 | 5 | Less than 6 months | Watching movies/series |
df.shape
(303, 13)
count_df=pd.DataFrame(index=df.columns)
count_df["Null Values"]=df.isnull().sum()
count_df["Unique Values"]=df.nunique()
count_df
Null Values | Unique Values | |
---|---|---|
Course | 0 | 13 |
State | 0 | 27 |
Gender | 0 | 3 |
Mental Health Rating | 0 | 5 |
Family History | 0 | 2 |
Sleep Duration | 0 | 4 |
Counselling Service Usage | 0 | 2 |
Family Structure | 0 | 2 |
First Time Away From Home | 0 | 2 |
Importance of Prayer,Meditation | 0 | 5 |
Stress Score | 0 | 5 |
Length of Stay | 0 | 3 |
Coping Strategies | 0 | 7 |
df.size
3939
df.dtypes
Course object State object Gender object Mental Health Rating int64 Family History object Sleep Duration object Counselling Service Usage object Family Structure object First Time Away From Home object Importance of Prayer,Meditation int64 Stress Score int64 Length of Stay object Coping Strategies object dtype: object
df.info()
<class 'pandas.core.frame.DataFrame'> Index: 303 entries, 0 to 314 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Course 303 non-null object 1 State 303 non-null object 2 Gender 303 non-null object 3 Mental Health Rating 303 non-null int64 4 Family History 303 non-null object 5 Sleep Duration 303 non-null object 6 Counselling Service Usage 303 non-null object 7 Family Structure 303 non-null object 8 First Time Away From Home 303 non-null object 9 Importance of Prayer,Meditation 303 non-null int64 10 Stress Score 303 non-null int64 11 Length of Stay 303 non-null object 12 Coping Strategies 303 non-null object dtypes: int64(3), object(10) memory usage: 33.1+ KB
df.describe()
Mental Health Rating | Importance of Prayer,Meditation | Stress Score | |
---|---|---|---|
count | 303.000000 | 303.000000 | 303.000000 |
mean | 3.069307 | 3.224422 | 3.587459 |
std | 1.144443 | 1.202562 | 1.147171 |
min | 1.000000 | 1.000000 | 1.000000 |
25% | 2.000000 | 2.000000 | 3.000000 |
50% | 3.000000 | 3.000000 | 4.000000 |
75% | 4.000000 | 4.000000 | 4.000000 |
max | 5.000000 | 5.000000 | 5.000000 |
Observation: 25th percentile is 3. which means 75% of respondents are facing average/above-average stress
df.shape
(303, 13)
df.head()
Course | State | Gender | Mental Health Rating | Family History | Sleep Duration | Counselling Service Usage | Family Structure | First Time Away From Home | Importance of Prayer,Meditation | Stress Score | Length of Stay | Coping Strategies | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | MSc DS | Kerala | Male | 4 | No | 4-6 hours | No | Joint Family | Yes | 4 | 4 | Less than 6 months | Meditation |
1 | MSc DS | Kerala | Female | 2 | No | Less than 4 hours | No | Nuclear Family | No | 3 | 4 | Less than 6 months | Listening music |
2 | MSc DS | Uttar Pradesh | Female | 4 | No | 4-6 hours | No | Nuclear Family | No | 3 | 4 | Less than 6 months | Listening music |
3 | MSc DS | Kerala | Female | 2 | No | 7-8 hours | Yes | Nuclear Family | No | 3 | 4 | Less than 6 months | Sleeping |
5 | MSc DS | West Bengal | Female | 1 | No | 4-6 hours | No | Nuclear Family | Yes | 3 | 5 | Less than 6 months | Watching movies/series |
df.to_csv("cleaned_data.csv")
df=pd.read_csv("cleaned_data.csv")
import seaborn as sns
import matplotlib.pyplot as plt
df["Stress Score"] = df["Stress Score"].replace(1,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(2,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(3,"Moderate_stress")
df["Stress Score"] = df["Stress Score"].replace(4,"High_stress")
df["Stress Score"] = df["Stress Score"].replace(5,"High_stress")
df["Course"] = df["Course"].replace("MSc Global Finance and Analytics","Msc GFA")
plt.figure(figsize=(12, 6))
sns.countplot(x='Course', hue='Stress Score', data=df,palette='dark:red',stat="percent")
plt.xlabel('stress')
plt.ylabel('Percent')
plt.title('Comparison of stress over the courses')
plt.xticks(rotation=45)
plt.legend(title='Legend')
plt.show()
Insight: Stress level of respondents in MBA ,MSc DS, and BA LLB have the highest stress levels in the college in the order they are written(descending).
df=pd.read_csv("cleaned_data.csv")
plt.figure(figsize=(10,5))
sns.stripplot(y=df["Stress Score"],x=df["Mental Health Rating"],size=5,jitter=.2,alpha=0.7)
plt.title("Mental Health Rating Vs Stress Score")
plt.xlabel("Mental Health Rating")
plt.ylabel("Stress Score")
plt.show()
plt.close()
observation: a lot of the people who claimed they had good mental health, had high stress levels. this shows a lack of awareness among people as they are unable to identify their own mental health situation.
df["Mental Health Rating"] = df["Mental Health Rating"].replace(1,"Low_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(2,"Low_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(3,"Average_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(4,"Good_Mental_Health")
df["Mental Health Rating"] = df["Mental Health Rating"].replace(5,"Good_Mental_Health")
mental_health_count=df.value_counts(["Mental Health Rating"])
mental_health_frame=mental_health_count.to_frame()
mental_health_frame.reset_index(inplace=True)
mental_health_frame.sort_values(by="count",ascending=True,inplace=True)
mental_health_frame
Mental Health Rating | count | |
---|---|---|
2 | Average_Mental_Health | 74 |
1 | Low_Mental_Health | 110 |
0 | Good_Mental_Health | 119 |
label_data= mental_health_frame["Mental Health Rating"]
count_data = mental_health_frame["count"]
plt.pie(count_data, labels=label_data,autopct='%1.1f%%',wedgeprops = { 'linewidth' : 2, 'edgecolor' : 'white' },colors=["red","darkgreen","hotpink","purple","orange"],startangle=0)
plt.title("Mental Health Rating among Students")
plt.show()
Insight: Approx. 40% of respondents have good mental health
df["Stress Score"] = df["Stress Score"].replace(1,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(2,"Low_stress")
df["Stress Score"] = df["Stress Score"].replace(3,"Moderate_stress")
df["Stress Score"] = df["Stress Score"].replace(4,"High_stress")
df["Stress Score"] = df["Stress Score"].replace(5,"High_stress")
stress_count=df.value_counts(["Stress Score"])
stress_frame=stress_count.to_frame()
stress_frame.reset_index(inplace=True)
stress_frame.sort_values(by="count",ascending=True,inplace=True)
stress_frame
Stress Score | count | |
---|---|---|
2 | Low_stress | 55 |
1 | Moderate_stress | 71 |
0 | High_stress | 177 |
label_data= stress_frame["Stress Score"]
count_data = stress_frame["count"]
plt.pie(count_data, labels=label_data,autopct='%1.1f%%',wedgeprops = { 'linewidth' : 2, 'edgecolor' : 'white' },colors=["red","darkgreen","hotpink","purple","orange"],startangle=0)
plt.title("Stress Score among Students")
plt.show()
Insight: High Stress levels among students. Approx 58% of respondents report high stress.
import plotly.express as px
df2=df.value_counts(["Length of Stay","Stress Score"])
df3=df2.to_frame()
df3.reset_index(inplace=True)
# %pip install nbformat
fig = px.scatter(x="Stress Score", y="Length of Stay",size_max=60,size="count",data_frame=df3)
fig.update_layout(title_text = "Length of stay Vs Stress",showlegend = True)
fig.show()
Insight: As one gets used to the place, their stress levels go down. Among the newbies, 58% respondents experienced high levels of stress. 66% of respondents who stayed here for more than 1 year experience high level stress. However, only 30 % of people who have been in Lavasa for more than 2 years experience high stress. Those who have been here for more than 2 years are in courses like BA LLB,etc. With more time they get are able to get accustomed to Lavasa. However, the other categories include a large percent of students in Master Courses, who dont have the luxury of time in Lavasa.
sns.stripplot(x=df["Counselling Service Usage"],y=df["Mental Health Rating"],size=3,jitter=.3)
plt.xlabel('Utilization of Counselling')
plt.ylabel('Mental Health Rating')
plt.title("Mental Health Rating Vs Counselling Usage")
plt.grid(True)
plt.show()
Insight: Irrespective of Mental health, a majority of respondents are not using the counselling services provided.
plt.figure(figsize=(5,5))
sns.countplot(hue="Counselling Service Usage",x="Mental Health Rating",data=df,palette="dark",stat="percent")
plt.title("Mental Health Rating and using of Counselling service")
plt.xticks(rotation=10)
plt.show()
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(1,"Low_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(2,"Low_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(3,"Moderate_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(4,"High_Importance")
df["Importance of Prayer,Meditation"] = df["Importance of Prayer,Meditation"].replace(5,"High_Importance")
sns.countplot(hue="Importance of Prayer,Meditation",x="Stress Score",data=df,stat="percent")
sns.despine(top=True)
plt.title("How Prayer, Meditation affects Stress")
plt.show()
Respondents who report high stress levels have higher importance for prayer and meditation.
df=pd.read_csv("cleaned_data.csv")
g1=df.set_index("Counselling Service Usage")
used_g_form=g1.loc[["No"]]
plt.figure(figsize=(5,5))
plt.hist(used_g_form["Mental Health Rating"],edgecolor="white",bins=[0.5,1.5,2.5,3.5,4.5,5.5],color="red")
plt.xlabel("Stress Score")
plt.ylabel("No. of Students not using Counselling Services")
plt.title("Stress Score among students not using Counselling service")
plt.show()
Insight: Among the people who are not using the services, there is high amount of stress. therefore, there is something pulling the students away from seeking help
df["Sleep Duration"].replace("Less than 4 hours","Less than 6 hr",inplace=True)
df["Sleep Duration"].replace("4-6 hours","Less than 6 hr",inplace=True)
df["Sleep Duration"].replace("7-8 hours","6hr or more",inplace=True)
df["Sleep Duration"].replace("More than 8 hours","6hr or more",inplace=True)
plt.figure(figsize=(5,5))
sns.countplot(hue="Sleep Duration",x="Mental Health Rating",data=df,palette="pastel")
sns.despine(top=True)
plt.title("Sleep Duration Vs Mental Health Rating")
plt.show()
Observation: Those who have best and worst mental health tend to have a good amount of sleep. For other categories data is inconclusive.
plt.figure(figsize=(5,5))
sns.countplot(hue="Sleep Duration",x="Stress Score",data=df,palette="bright")
sns.despine(top=True,right=True)
plt.title("Sleep Duration and Stress")
plt.show()
Therefore less sleep => more stress Explanation for Anomaly in the case of those with stress score=5 could be that people tend to escape through sleep.
# df["Sleeping"]=df["Sleeping"].replace("7-8 hours","good")
# df["Sleeping"]=df["Sleeping"].replace("More than 8 hours","good")
# df["Sleeping"]=df["Sleeping"].replace("4-6 hours","modarate")
# df["Sleeping"]=df["Sleeping"].replace("Less than 4 hours","bad")
sns.stripplot(x=df["Sleep Duration"],y=df["Course"],size=4,jitter=.4,alpha=.8,hue=df["Gender"],)
plt.xlabel('Sleep')
plt.ylabel('Courses')
plt.title('Influence of Course on Sleep Duration')
plt.grid(True)
plt.xticks(rotation=45,fontsize=8)
plt.show()
Sleep is less in Msc Ds, MBA, BA LLB, BBA LLB
Insight: Students in MBA and MSc Data SCience report high levels of stress when compared to others. Also, a large number of students report high levels of stress.
sns.stripplot(x=df["Gender"],y=df["Mental Health Rating"],size=3,jitter=.3,alpha=0.5 )
plt.xlabel('Gender')
plt.ylabel("Mental Health Rating")
plt.title('Gender Vs Mental Health Rating ')
plt.show()
sns.stripplot(x=df["Gender"],y=df["Stress Score"],size=3,jitter=.3,alpha=0.5 )
plt.xlabel('Gender')
plt.ylabel("Stress Score")
plt.title('Gender Vs Stress Level ')
plt.show()
Females are more stressed than males
sns.stripplot(x=df["First Time Away From Home"],y=df["Stress Score"],size=3,jitter=.3,alpha=0.5)
plt.xlabel('Stress Score')
plt.ylabel('Staying away from home for first time?')
plt.title('Effect of staying away from home for first time')
plt.show()
Those who stay away from home for the first time are more stressed
df["Mental Health Rating"]=df["Mental Health Rating"].replace(1,"low_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(2,"low_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(3,"moderate_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(4,"high_mentalhealth")
df["Mental Health Rating"]=df["Mental Health Rating"].replace(5,"high_mentalhealth")
pivot_table = df.groupby(['Family Structure', 'Mental Health Rating']).size().unstack(fill_value=0)
plt.figure(figsize=(12, 6))
sns.heatmap(pivot_table, annot=True, cmap='viridis', fmt='d')
plt.xlabel('mental health')
plt.ylabel('family type')
plt.title('Correlation between family type and mental health')
plt.show()
inconclusive evidence
df['Gender']=df['Gender'].replace("Other",np.nan)
df.dropna(inplace=True)
male_data = df[df['Gender'] == 'Male']
female_data = df[df['Gender'] == 'Female']
male_satisfaction = male_data["Coping Strategies"]
female_satisfaction = female_data["Coping Strategies"]
plt.figure(figsize=(10, 4))
plt.hist(male_satisfaction, bins=30, alpha=1, label='Male',align = 'left')
plt.hist(female_satisfaction, bins=30, alpha=1, label='Female',align = 'right')
plt.xlabel('escape method from stress')
plt.ylabel('Frequency')
plt.title('Analysis of Coping Strategies')
plt.legend()
plt.show()
A large percent of students prefer listening to music to get relief from stress. 33% of males and 38% of females prefer music as a way of esaping stress.